 ************************************************
* Sleep Project - Pedro Bessone, Gautam Rao, Heather Schofield, Frank Schilbach, and Mattie Toma
* Purpose: Produces the randomization inference p-values for Appendix Table 8 (Main Treatment Effects, Pooling Night-Sleep Treatments and Including Multiple Hypothesis Testing Corrections)
* Last edited: 07 May 2021
************************************************


clear all
set more off
set seed 1
	
	do "$s/anderson_index_program.do"
	
	cap reghdfe, compile
	
***************
*TREATMENT IDS*
***************
	set obs 1000
	
//gen treatment ids
	gen lslp_id = .
	label var lslp_id "LSLP_ID - Low Sleep and Low Productivity" 
	replace lslp_id = _n if _n < round(_N/4)+1
	
	gen lshp_id = . 
	label var lshp_id "LSHP_ID - Low Sleep and High Productivity" 
	replace lshp_id = _n - round(_N/4) if _n < 2*round(_N/4)+1 &_n > round(_N/4)
	
	gen hslp_id = . 
	label var hslp_id "HSLP_ID - High Sleep and Low Productivity" 
	replace hslp_id = _n - 2*round(_N/4) if _n < 3*round(_N/4)+1 &_n > 2*round(_N/4)
	
	gen hshp_id = . 
	label var hshp_id "HSHP_ID - High Sleep and High Productivity"
	replace hshp_id = _n - 3*round(_N/4) if _n > 3*round(_N/4)
	
//sleep group assignments

	*Create sleep treatment group variable
	gen treat_pool = .
	label var treat_pool "Sleep treatment group"
	label define t 0 "Control Group" 1 "Sleepaid Treatment Group" 2 "Sleepaid & Incentive Treatment Group"
	label values treat_pool t
	
	gen rand = runiform()
	
	gen trio_indic = round((lslp_id+1)/3) // In each group, gives the same value to blocks of three observations. 
	replace trio_indic = round((lshp_id+1)/3) if !missing(lshp_id)
	replace trio_indic = round((hslp_id+1)/3) if !missing(hslp_id)
	replace trio_indic = round((hshp_id+1)/3) if !missing(hshp_id)
	
	gen group=1 if !missing(lslp_id)
	replace group=2 if !missing(lshp_id)
	replace group=3 if !missing(hslp_id)
	replace group=4 if !missing(hshp_id)
	
	sort group trio_indic rand
	replace treat_pool=mod(_n,3)
	
	drop trio_indic group
	
	*Generate PID2 variable which is the unique identifier of the order in the different strata
	 
	gen pid2 = 11000+lslp_id if !missing(lslp_id) // between 11001 and 11250, participants are part of the lslp
	replace pid2 = 11250+lshp_id if !missing(lshp_id) //between 11251 and 11500, participants are part of the lshp
	replace pid2 = 11500+hslp_id if !missing(hslp_id) //between 11501 and 11750, participants are part of the hslp
	replace pid2 = 11750+hshp_id if !missing(hshp_id) //between 11751 and 12000, participants are part of the hshp
	
	label var pid2 "Identifier based on the order on which a participant enters one of the strata on treatment day"

//nap group assignments

	*Create a nap treatment group variable (this will help later for the daily schedule)
	gen rand_1 = runiform()
	
	bysort treat_pool (lslp_id lshp_id hslp_id hshp_id) : gen count_temp = _n /*Treating each group differently, hence restarting the counter for each group*/	
	gen nap_group  = round((count_temp)/2) /* Counter for each couple of two cells*/
	
	drop count_temp
	
	bysort treat_pool nap_group (rand_1): gen treat_nap = mod(_n,2) /*First one of the couple sorted by the random variable gets Nap control, second one gets treatment*/
	
	label var treat_nap "Nap treatment group"
	label define n 0 "Nap Treatment Group" 1 "Nap Control Group" // put 1 for control group exceptionally as it makes it easier for the rest of the code. 
	label values treat_nap n
	
	gen treat_s =(treat_pool == 1)
	gen treat_s_i=(treat_pool == 2)
	drop treat_pool
	gen treat_pool = 0
	replace treat_pool = 1 if treat_s == 1 | treat_s_i == 1
	
	drop rand_1
	
//save file
tempfile assignment_ids
save `assignment_ids', replace

***************
*PREP DATA
***************

use "$d/typing_dataset.dta", clear

		egen treat_pool_diff = mean(treat_pool), by (pid)
		replace treat_pool=1 if treat_pool_diff!=0 
		drop treat_pool_diff

		egen _treat_nap = max(treat_nap), by(pid)
		drop treat_nap
		rename  _treat_nap treat_nap
	
		drop if day_in_study==.
		drop if day_in_study==1
		
		rename productivity prod
		rename typing_time_hr typing
		
		bysort pid: egen bsl_earnings_temp = mean(earnings) if day_in_study > 1 & day_in_study < 9
		bysort pid: egen bsl_avg_earnings = max(bsl_earnings_temp)
	
		sort pid date
	
		* Generate strata
	
		* Creating first day in study
		
		bys pid: egen study_join_date = min(date)
	
		* First wave (lasts from beginning until Feb 17,2018 = 21232 in numerical values)
		
		gen first_wave_strata = (study_join_date < 21232)

			*Low Sleep Quality and Low Productivity (LSLP)
			gen lslpid_dummy = .
			replace lslpid_dummy = 1 if bsl_avg_sleep < 5.55 & bsl_avg_earnings < 197 & first_wave_strata==1
				
			*Low Sleep Quality and High Productivity (LSHP)
			gen lshpid_dummy = . 
			replace lshpid_dummy = 1 if bsl_avg_sleep < 5.55 & bsl_avg_earnings >= 197 & first_wave_strata==1
				
			*High Sleep Quality and Low Productivity (HSLP)
			gen hslpid_dummy = .
			replace hslpid_dummy = 1 if bsl_avg_sleep >= 5.55 & bsl_avg_earnings < 197 & first_wave_strata==1
				
			*High Sleep Quality and High Productivity (HSHP)
			gen hshpid_dummy = . 
			replace hshpid_dummy = 1 if bsl_avg_sleep >= 5.55 & bsl_avg_earnings >= 197 & first_wave_strata==1
			
		* Second wave (lasts from Feb 17,2018 = 21232  until Apr 30,2018 = 21304)
		
		gen second_wave_strata = (study_join_date >= 21232 & study_join_date < 21304)		
		
			*Low Sleep Quality and Low Productivity (LSLP)
			replace lslpid_dummy = 1 if bsl_avg_sleep < 5.90 & bsl_avg_earnings < 260 & second_wave_strata==1
				
			*Low Sleep Quality and High Productivity (LSHP)
			replace lshpid_dummy = 1 if bsl_avg_sleep < 5.90 & bsl_avg_earnings >= 260 & second_wave_strata==1
				
			*High Sleep Quality and Low Productivity (HSLP)
			replace hslpid_dummy = 1 if bsl_avg_sleep >= 5.90 & bsl_avg_earnings < 260 & second_wave_strata==1
				
			*High Sleep Quality and High Productivity (HSHP)
			replace hshpid_dummy = 1 if bsl_avg_sleep >= 5.90 & bsl_avg_earnings >= 260 & second_wave_strata==1		
	
		* Third wave (lasts from Apr 30,2018 until end)
		
		gen third_wave_strata = (study_join_date >= 21304)					
		
			*Low Sleep Quality and Low Productivity (LSLP)
			replace lslpid_dummy = 1 if bsl_avg_sleep < 5.63 & bsl_avg_earnings < 251 & third_wave_strata==1
				
			*Low Sleep Quality and High Productivity (LSHP)
			replace lshpid_dummy = 1 if bsl_avg_sleep < 5.63 & bsl_avg_earnings >= 251 & third_wave_strata==1
				
			*High Sleep Quality and Low Productivity (HSLP)
			replace hslpid_dummy = 1 if bsl_avg_sleep >= 5.63 & bsl_avg_earnings < 251 & third_wave_strata==1
				
			*High Sleep Quality and High Productivity (HSHP)
			replace hshpid_dummy = 1 if bsl_avg_sleep >= 5.63 & bsl_avg_earnings >= 251 & third_wave_strata==1
			
		egen tag_pid_post = tag(pid post_treatment)	if day_in_study!=1
		
		keep hshpid_dummy hslpid_dummy lshpid_dummy lslpid_dummy tag_pid_post pid day_in_study prod typing earnings output female long_day age fraction_high post_treatment at_present_check extra_work treat_pool treat_nap

		tempfile labor
		save `labor', replace

***************
*MHT SIMULATIONS
***************

local sim_num = $reps

forval x = 1/`sim_num'{
	
	di "This is iteration `x'"
	
	use `labor', clear
			
		quietly {
		
			cap drop pid2 pid2_temp
			cap drop randnum
			cap drop tag_one
			cap drop b_* se_* tstat*
			cap drop treat_pool treat_nap
			cap drop rand_work
			cap drop rank_work_day
			cap drop post_lunch_act			
		
			*Re-randomize treatment groups 
			
				gen randnum = runiform(0,1)
				sort randnum
				
				gen pid2_temp = . 
				
				bysort pid (day_in_study): gen tag_one = 1 if _n == 1
				bysort lslpid_dummy lshpid_dummy hslpid_dummy hshpid_dummy (tag_one randnum): replace pid2_temp=11000+_n if lslpid_dummy==1 & tag_one == 1
				bysort lslpid_dummy lshpid_dummy hslpid_dummy hshpid_dummy (tag_one randnum): replace pid2_temp=11250+_n if lshpid_dummy==1 & tag_one == 1
				bysort lslpid_dummy lshpid_dummy hslpid_dummy hshpid_dummy (tag_one randnum): replace pid2_temp=11500+_n if hslpid_dummy==1 & tag_one == 1
				bysort lslpid_dummy lshpid_dummy hslpid_dummy hshpid_dummy (tag_one randnum): replace pid2_temp=11750+_n if hshpid_dummy==1 & tag_one == 1
				
				bysort pid (day_in_study): egen pid2 = max(pid2_temp)
				
				drop randnum 
				
				merge m:1 pid2 using "`assignment_ids'", nogen
				
				gen treat_pool_cell = treat_pool*(1-treat_nap)
				gen treat_nap_cell = treat_nap*(1-treat_pool)
				gen treat_int = treat_pool*treat_nap
				gen treat_int_cell = treat_int 
				
				*Re-randomize break/work
				cap drop extra_work
				gen rand_work = runiform() if (day_in_study != 1 & day_in_study != 28)

				bys pid : egen rank_work_day = rank(rand_work) 
	
				gen post_lunch_act = mod(rank_work_day,2)
				
				gen extra_work = (post_lunch_act==1 & treat_nap==0) if day_in_study!=28 //Day 28 they were free to choose
	
	collapse treat_pool treat_nap treat_pool_cell treat_nap_cell treat_int_cell extra_work, by(pid day_in_study)
	keep pid day_in_study treat_pool treat_nap treat_pool_cell treat_nap_cell treat_int_cell extra_work
	tempfile rand
	save `rand', replace
	
***********************
*LABOR SUPPLY OUTCOMES*
***********************

	use "$d/typing_dataset.dta", clear
	drop treat_pool treat_nap extra_work
	merge 1:1 pid day_in_study using `rand', nogen

	drop if day_in_study==.
	drop if day_in_study==1

	rename productivity prod
	rename typing_time_hr typing

		*Standardize
		foreach var in prod typing earnings output {
			sum `var' if treat_pool == 0 & treat_nap == 0 & post_treatment==1
			gen `var'_post_std = (`var' - r(mean)) / r(sd)

			bysort pid: egen `var'_pre_temp = mean(`var') if post_treatment == 0
			bysort pid: egen `var'_pre = mean(`var'_pre_temp)
			sum `var'_pre if treat_pool == 0 & treat_nap == 0 & post_treatment==0
			gen `var'_pre_std = (`var'_pre - r(mean)) /r(sd)
			drop `var'_pre `var'_pre_temp
		}

		* Regressions
		foreach var in prod typing earnings output {

			cap drop baseline
			egen baseline 	= mean(`var'_pre_std), by(pid)

			*Pooled treatment
			reghdfe `var'_post_std treat_pool_cell treat_nap_cell treat_int_cell baseline i.age fraction_high female long_day if post_treatment==1 & at_present_check==1, absorb(date day_in_study) cluster(pid)

			local pids_`var'_ns = e(N_clust)
			local obs_`var'_ns = e(N)

			lincom _b[treat_pool_cell]
				local coef_`var'_ns = string(r(estimate),"%3.2f")
				local se_`var'_ns = string(r(se),"%3.2f")
				local tstat_`var'_ns = r(estimate)/r(se)
				local p_`var'_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_nap_cell]
				local coef_`var'_nap2 = string(r(estimate),"%3.2f")
				local se_`var'_nap2 = string(r(se),"%3.2f")
				local tstat_`var'_nap2 = r(estimate)/r(se)
				local p_`var'_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_int_cell]
				local coef_`var'_int = string(r(estimate),"%3.2f")
				local se_`var'_int = string(r(se),"%3.2f")
				local tstat_`var'_int = r(estimate)/r(se)
				local p_`var'_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
			* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_`var'_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_`var'_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_`var'_d_int_nap = string(r(p), "%3.2f")

			}


**************
* WELL-BEING *
**************

	use "$d/health_dataset.dta", clear
	
	drop treat_pool treat_nap 
	merge 1:1 pid day_in_study using `rand', nogen

	* Psych Wellbeing
	* note: code will only work properly if you run it from preserve to restore in a single run
	preserve

	*residualize
	describe ds_a32_happiness ds_g2_life_possibility ds_g1_satisfaction ds_g13c_stressed es_dep, varlist
	local wb_vars `r(varlist)'
	foreach var in `wb_vars' {
		reghdfe `var'  , absorb(day_in_study date) residuals
		cap drop `var'
		rename _reghdfe_resid `var'
	}

	collapse (mean) ds_a32_happiness ds_g2_life_possibility ds_g1_satisfaction ds_g13c_stressed es_dep (max) treat_pool_cell treat_nap_cell treat_int_cell female age, by(pid post_treatment)

	
* Weighted index (Anderson 2008)

		*Pooled treatments
			* Step 1: define locals for the relevant arguments

				*Components
				local components "ds_a32_happiness ds_g2_life_possibility ds_g1_satisfaction ds_g13c_stressed es_dep"

				*Covariates to be included linearly
				local controls "female"

				*Treatment dummies
				local treatments "treat_pool_cell treat_nap_cell treat_int_cell"

				*Covariates to be included inside "absorb" of reghdfe (if this is empty, add a factor var from controls)
				local factorcovariates "age"

				*Subset
				local subset "if post_treatment==1"

				*Eststo? Y = 1, N = 0
				local eststo 0

			* Step 2: REMEMBER: ALWAYS MAKE SURE SIGNS ARE RIGHT
			replace es_dep = -es_dep
			replace ds_g13c_stressed = -ds_g13c_stressed

			* Step 3: Just run this line!
				anderson_index "`components'" "`treatments'" "`controls'" "`factorcovariates'" "`subset'" `eststo'

				local pids_psych = e(N)
				local obs_psych = e(N)

				lincom _b[treat_pool_cell]
					local coef_psych_ns = string(r(estimate),"%3.2f")
					local se_psych_ns = string(r(se),"%3.2f")
					local tstat_psych_ns = r(estimate)/r(se)
					local p_psych_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

				lincom _b[treat_nap_cell]
					local coef_psych_nap2 = string(r(estimate),"%3.2f")
					local se_psych_nap2 = string(r(se),"%3.2f")
					local tstat_psych_nap2 = r(estimate)/r(se)
					local p_psych_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

				lincom _b[treat_int_cell]
					local coef_psych_int = string(r(estimate),"%3.2f")
					local se_psych_int = string(r(se),"%3.2f")
					local tstat_psych_int = r(estimate)/r(se)
					local p_psych_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
					
				* Get pval of differences
				lincom _b[treat_nap_cell] - _b[treat_pool_cell]
					local p_psych_d_nap_ns = string(r(p), "%3.2f")
				lincom _b[treat_int_cell] - _b[treat_pool_cell]
					local p_psych_d_int_ns = string(r(p), "%3.2f")
				lincom _b[treat_int_cell] - _b[treat_nap_cell]
					local p_psych_d_int_nap = string(r(p), "%3.2f")
						
	tempfile psych
	save `psych', replace

	restore

	* Phys Wellbeing

	local health_vars "distance max_speed win_systolic_bp win_diastolic_bp"

	* Create standardized versions of postline variables
	foreach var in `health_vars' {
		sum `var' if treat_pool == 0 & treat_nap == 0 & post_treatment==1
		gen `var'_post_std = (`var' - r(mean)) / r(sd)
	}

	*Create biking_task and win_bp
	egen biking_task = rowtotal(max_speed_post_std distance_post_std) if post_treatment == 1
	egen nomiss_post = rownonmiss(max_speed_post_std distance_post_std)
	replace biking_task = biking_task/nomiss_post if post_treatment == 1

	egen win_bp = rowtotal(win_systolic_bp_post_std win_diastolic_bp_post_std) if post_treatment == 1
	cap drop nomiss_post
	egen nomiss_post = rownonmiss(win_systolic_bp_post_std win_diastolic_bp_post_std)
	replace win_bp = win_bp/nomiss_post if post_treatment == 1

	* Create standardized versions of baseline variables
	foreach var in `health_vars'{
		bysort pid: egen `var'_pre_temp = mean(`var') if post_treatment == 0
		bysort pid: egen `var'_pre = mean(`var'_pre_temp)
		sum `var'_pre if treat_pool == 0 & treat_nap == 0
		gen `var'_pre_std = (`var'_pre - r(mean)) / r(sd)
		drop `var'_pre `var'_pre_temp
	}

	*Create win_bp (summing blood pressure at pre-treatment)
	egen win_bp_pre = rowtotal(win_systolic_bp_pre_std win_diastolic_bp_pre_std) if post_treatment == 0
	cap drop nomiss_pre
	egen nomiss_pre = rownonmiss(win_systolic_bp_pre_std win_diastolic_bp_pre_std)
	replace win_bp_pre = win_bp_pre/nomiss_pre if post_treatment == 0

	replace win_bp = win_bp_pre if post_treatment == 0
	drop win_bp_pre

	* Adjust sign for different measures
	foreach var of varlist win_bp es_ill_week es_pain es_daily_act{
		replace `var' = -`var'
	}

	* Residualize
	local components "win_bp"
	foreach var in `components' {
		reghdfe `var' , absorb(day_in_study date) residuals
		cap drop `var'
		rename _reghdfe_resid `var'
	}
	local components "biking_task win_bp es_ill_week es_pain es_daily_act"
	collapse (mean) `components' (max) female age treat_pool_cell treat_nap_cell treat_int_cell, by(pid post_treatment)

	preserve
	* Weighted index (Anderson 2008)

	*Pooled treatments
		* Step 1: define locals for the relevant arguments

			*Components
			local components "biking_task win_bp es_ill_week es_pain es_daily_act"

			*Covariates to be included linearly
			local controls "female"

			*Treatment dummies
			local treatments "treat_pool_cell treat_nap_cell treat_int_cell"

			*Subset
			local subset "if post_treatment==1"

			*Covariates to be included inside "absorb" of reghdfe (if this is empty, add a factor var from controls)
			local factorcovariates "age"

			*Eststo? Y = 1, N = 0
			local eststo 0

		* Step 2: REMEMBER: ALWAYS MAKE SURE SIGNS ARE RIGHT

		* Step 3: Just run this line!
			anderson_index "`components'" "`treatments'" "`controls'" "`factorcovariates'" "`subset'" `eststo'

			local pids_physical = e(N)
			local obs_physical = e(N)

			lincom _b[treat_pool_cell]
				local coef_physical_ns = string(r(estimate),"%3.2f")
				local se_physical_ns = string(r(se),"%3.2f")
				local tstat_physical_ns = r(estimate)/r(se)
				local p_physical_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_nap_cell]
				local coef_physical_nap2 = string(r(estimate),"%3.2f")
				local se_physical_nap2 = string(r(se),"%3.2f")
				local tstat_physical_nap2 = r(estimate)/r(se)
				local p_physical_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_int_cell]
				local coef_physical_int = string(r(estimate),"%3.2f")
				local se_physical_int = string(r(se),"%3.2f")
				local tstat_physical_int = r(estimate)/r(se)
				local p_physical_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
			* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_physical_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_physical_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_physical_d_int_nap = string(r(p), "%3.2f")

		restore
			
*******************
* WELLBEING INDEX *
*******************

	merge 1:1 pid post_treatment using `psych'
	
	* Weighted index (Anderson 2008)

	*Pooled treatments
		* Step 1: define locals for the relevant arguments

			*Components
			local components "ds_a32_happiness ds_g2_life_possibility ds_g1_satisfaction ds_g13c_stressed es_dep biking_task win_bp es_ill_week es_pain es_daily_act"

			*Covariates to be included linearly
			local controls "female"

			*Treatment dummies
			local treatments "treat_pool_cell treat_nap_cell treat_int_cell"

			*Subset
			local subset "if post_treatment==1"

			*Covariates to be included inside "absorb" of reghdfe (if this is empty, add a factor var from controls)
			local factorcovariates "age"

			*Eststo? Y = 1, N = 0
			local eststo 0

		* Step 2: REMEMBER: ALWAYS MAKE SURE SIGNS ARE RIGHT

		* Step 3: Just run this line!
			anderson_index "`components'" "`treatments'" "`controls'" "`factorcovariates'" "`subset'" `eststo'

			local pids_wellbeing = e(N)
			local obs_wellbeing = e(N)

			lincom _b[treat_pool_cell]
				local coef_wellbeing_ns = string(r(estimate),"%3.2f")
				local se_wellbeing_ns = string(r(se),"%3.2f")
				local tstat_wellbeing_ns = r(estimate)/r(se)
				local p_wellbeing_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_nap_cell]
				local coef_wellbeing_nap2 = string(r(estimate),"%3.2f")
				local se_wellbeing_nap2 = string(r(se),"%3.2f")
				local tstat_wellbeing_nap2 = r(estimate)/r(se)
				local p_wellbeing_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_int_cell]
				local coef_wellbeing_int = string(r(estimate),"%3.2f")
				local se_wellbeing_int = string(r(se),"%3.2f")
				local tstat_wellbeing_int = r(estimate)/r(se)
				local p_wellbeing_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
			* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_wellbeing_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_wellbeing_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_wellbeing_d_int_nap = string(r(p), "%3.2f")

*******************************
* RISK AND SOCIAL PREFERENCES *
*******************************

use "$d/riskandsocial_dataset.dta", clear

	drop treat_pool treat_nap 
	merge 1:1 pid day_in_study using `rand', nogen
	
	collapse (mean) risk_switch_point loss_switch_point d_send u_send t_send u_avg_receive t_avg_amountsent (max) female age treat_pool_cell treat_nap_cell treat_int_cell, by(pid post_treatment)

	*** Risk Preferences

	* Weighted index (Anderson 2008)

	* Pooled treatments
		* Step 1: define locals for the relevant arguments

				*Components
				local components "risk_switch_point loss_switch_point"

				*Covariates to be included linearly
				local controls "female"

				*Treatment dummies
				local treatments "treat_pool_cell treat_nap_cell treat_int_cell"

				*Subset
				local subset "if post_treatment==1"

				*Covariates to be included inside "absorb" of reghdfe (if this is empty, add a factor var from controls)
				local factorcovariates "age"

				*Eststo? Y = 1, N = 0
				local eststo 0

		* Step 2: REMEMBER: ALWAYS MAKE SURE SIGNS ARE RIGHT

		* Step 3: Just run this line!
			anderson_index "`components'" "`treatments'" "`controls'" "`factorcovariates'" "`subset'" `eststo'

			local pids_risk = e(N)
			local obs_risk = e(N)

			lincom _b[treat_pool_cell]
				local coef_risk_ns = string(r(estimate),"%3.2f")
				local se_risk_ns = string(r(se),"%3.2f")
				local tstat_risk_ns = r(estimate)/r(se)
				local p_risk_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_nap_cell]
				local coef_risk_nap2 = string(r(estimate),"%3.2f")
				local se_risk_nap2 = string(r(se),"%3.2f")
				local tstat_risk_nap2 = r(estimate)/r(se)
				local p_risk_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_int_cell]
				local coef_risk_int = string(r(estimate),"%3.2f")
				local se_risk_int = string(r(se),"%3.2f")
				local tstat_risk_int = r(estimate)/r(se)
				local p_risk_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
			* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_risk_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_risk_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_risk_d_int_nap = string(r(p), "%3.2f")		

	* Weighted index (Anderson 2008)

	*Pooled treatments
		* Step 1: define locals for the relevant arguments

			*Components
			local components "d_send u_send t_send u_avg_receive t_avg_amountsent"

			*Covariates to be included linearly
			local controls "female"

			*Treatment dummies
			local treatments "treat_pool_cell treat_nap_cell treat_int_cell"

			*Subset
			local subset "if post_treatment==1"

			*Covariates to be included inside "absorb" of reghdfe (if this is empty, add a factor var from controls)
			local factorcovariates "age"

			*Eststo? Y = 1, N = 0
			local eststo 0

		* Step 2: REMEMBER: ALWAYS MAKE SURE SIGNS ARE RIGHT

		* Step 3: Just run this line!
			anderson_index "`components'" "`treatments'" "`controls'" "`factorcovariates'" "`subset'" `eststo'

			local pids_social = e(N_clust)
			local obs_social = e(N)

			lincom _b[treat_pool_cell]
				local coef_social_ns = string(r(estimate),"%3.2f")
				local se_social_ns = string(r(se),"%3.2f")
				local tstat_social_ns = r(estimate)/r(se)
				local p_social_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_nap_cell]
				local coef_social_nap2 = string(r(estimate),"%3.2f")
				local se_social_nap2 = string(r(se),"%3.2f")
				local tstat_social_nap2 = r(estimate)/r(se)
				local p_social_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_int_cell]
				local coef_social_int = string(r(estimate),"%3.2f")
				local se_social_int = string(r(se),"%3.2f")
				local tstat_social_int = r(estimate)/r(se)
				local p_social_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
			* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_social_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_social_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_social_d_int_nap = string(r(p), "%3.2f")	

	tempfile rs_pref
	save `rs_pref', replace

********************
* TIME PREFERENCES *
********************

	* Present Bias
				
		use `rand', clear
		collapse treat_pool_cell treat_nap_cell treat_int_cell, by(pid)
		tempfile rand2
		save `rand2', replace
		
		use "$d/pb_dataset.dta", clear
		keep if type == "censored"
		drop treat_nap treat_pool
		merge 1:1 pid using `rand2'

		replace beta_post = 2 if beta_post > 2 & beta_post != .
		replace beta_base = 2 if beta_base > 2 & beta_base != .

		sum beta_base if convergence_code_post == 0 //only sample that's included in pb regressions
		replace beta_base = -9999999 if beta_base==.
		gen dummy_missing = (beta_base==-9999999)
		replace beta_base = r(mean) if beta_base==-9999999

		tempfile pb
		save `pb', replace

	* Savings

		use "$d/savings_dataset.dta", clear
		drop treat_pool treat_nap 
		merge 1:1 pid day_in_study using `rand'
		
		forval n=1/20{
			gen rewardrate`n'_dummy = (rewardrate`n' == float(0.02))
			replace rewardrate`n'_dummy = . if rewardrate`n' == .
		}

		egen fraction_high = rowtotal(rewardrate*_dummy)
		egen nomiss = rownonmiss(rewardrate*_dummy)
		replace fraction_high = fraction_high/nomiss

		* Residualized output
		local controls "female i.age risk_and_social_day default_amount rewardrate* max_pay_cog_tasks piece_rate_pb pid_interest"
		reghdfe deposits `controls', vce(cluster pid) absorb(surveyor date day_in_study) residuals
		cap drop deposits
		rename _reghdfe_resid deposits

		merge m:1 pid using `pb', gen(pb_merge)

		gen beta = beta_post
		replace beta = beta_base if post_treatment == 0
		
		collapse deposits beta dummy_missing (max) female age treat_pool_cell treat_nap_cell treat_int_cell, by(pid post_treatment)

	preserve
	* Weighted index (Anderson 2008)

	*Pooled treatments
		* Step 1: define locals for the relevant arguments

			*Components
			local components "deposits beta"

			*Covariates to be included linearly
			local controls "female"

			*Treatment dummies
			local treatments "treat_pool_cell treat_nap_cell treat_int_cell"

			*Covariates to be included inside "absorb" of reghdfe (if this is empty, add a factor var from controls)
			local factorcovariates "i.age"

			*Subset
			local subset "if post_treatment==1"

			*Eststo? Y = 1, N = 0
			local eststo 0

		* Step 2: REMEMBER: ALWAYS MAKE SURE SIGNS ARE RIGHT

		* Step 3: Just run this line!
			anderson_index "`components'" "`treatments'" "`controls'" "`factorcovariates'" "`subset'" `eststo'

			local pids_timepref = e(N_clust)
			local obs_timepref = e(N)

			lincom _b[treat_pool_cell]
				local coef_timepref_ns = string(r(estimate),"%3.2f")
				local se_timepref_ns = string(r(se),"%3.2f")
				local tstat_timepref_ns = r(estimate)/r(se)
				local p_timepref_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_nap_cell]
				local coef_timepref_nap2 = string(r(estimate),"%3.2f")
				local se_timepref_nap2 = string(r(se),"%3.2f")
				local tstat_timepref_nap2 = r(estimate)/r(se)
				local p_timepref_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_int_cell]
				local coef_timepref_int = string(r(estimate),"%3.2f")
				local se_timepref_int = string(r(se),"%3.2f")
				local tstat_timepref_int = r(estimate)/r(se)
				local p_timepref_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
			* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_timepref_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_timepref_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_timepref_d_int_nap = string(r(p), "%3.2f")	
	restore
	
*********************
* PREFERENCES INDEX *
*********************

	merge 1:1 pid post_treatment using `rs_pref', gen(rs_merge)

	* Weighted (Anderson)

	*Pooled treatment
		* Step 1: define locals for the relevant arguments

				* Components
				local components "risk_switch_point loss_switch_point d_send u_send t_send u_avg_receive t_avg_amountsent deposits beta"

				* Covariates to be included linearly
				local controls "female dummy_missing"

				* Treatment dummies
				local treatments "treat_pool_cell treat_nap_cell treat_int_cell"

				* Covariates to be included inside "absorb" of reghdfe (if this is empty, add a factor var from controls)
				local factorcovariates "age"

				* Subset
				local subset "if post_treatment==1"

				* Eststo? Y = 1, N = 0
				local eststo 0

		* Step 2: REMEMBER: ALWAYS MAKE SURE SIGNS ARE RIGHT
				* Signs have been changed already to make the average index!

		* Step 3: Just run this line!
				anderson_index "`components'" "`treatments'" "`controls'" "`factorcovariates'" "`subset'" `eststo'

			local pids_pref = e(N_clust)
			local obs_pref = e(N)

			lincom _b[treat_pool_cell]
				local coef_pref_ns = string(r(estimate),"%3.2f")
				local se_pref_ns = string(r(se),"%3.2f")
				local tstat_pref_ns = r(estimate)/r(se)
				local p_pref_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_nap_cell]
				local coef_pref_nap2 = string(r(estimate),"%3.2f")
				local se_pref_nap2 = string(r(se),"%3.2f")
				local tstat_pref_nap2 = r(estimate)/r(se)
				local p_pref_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_int_cell]
				local coef_pref_int = string(r(estimate),"%3.2f")
				local se_pref_int = string(r(se),"%3.2f")
				local tstat_pref_int = r(estimate)/r(se)
				local p_pref_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
			* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_pref_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_pref_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_pref_d_int_nap = string(r(p), "%3.2f")		

		
**********************
* COGNITIVE FUNCTION *
**********************

	use "$d/cognitive_tasks_dataset.dta", clear

	merge 1:1 pid day_in_study using "$d/pvt_dataset.dta"
	drop treat_pool treat_nap 
	merge 1:1 pid day_in_study using `rand', nogen

	egen max_pay_dummy = rowtotal(max_pay_hf_dummy max_pay_co_dummy)
	replace max_pay_dummy = 1 if max_pay_dummy == 2

	* Residualize
	local components "pv_perf hf_payment co_payment"
	foreach var in `components' {
		reghdfe `var', absorb(high_pay date day_in_study) residuals
		cap drop `var'
		rename _reghdfe_resid `var'
	}

	collapse `components' (max) female age treat_pool_cell treat_nap_cell treat_int_cell, by(pid post_treatment)

	* Weighted index (Anderson 2008)

	* Pooled treatments
		* Step 1: define locals for the relevant arguments

				* Components
				local components "pv_perf hf_payment co_payment"

				* Covariates to be included linearly
				local controls "female"

				* Treatment dummies
				local treatments "treat_pool_cell treat_nap_cell treat_int_cell"

				* Covariates to be included inside "absorb" of reghdfe (if this is empty, add a factor var from controls)
				local factorcovariates "age"

				* Subset
				local subset "if post_treatment==1"

				* Eststo? Y = 1, N = 0
				local eststo 0

		* Step 2: REMEMBER: ALWAYS MAKE SURE SIGNS ARE RIGHT

		* Step 3: Just run this line!
			anderson_index "`components'" "`treatments'" "`controls'" "`factorcovariates'" "`subset'" `eststo'

			local pids_cogfunction = e(N_clust)
			local obs_cogfunction = e(N)

			lincom _b[treat_pool_cell]
				local coef_cogfunction_ns = string(r(estimate),"%3.2f")
				local se_cogfunction_ns = string(r(se),"%3.2f")
				local tstat_cogfunction_ns = r(estimate)/r(se)
				local p_cogfunction_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_nap_cell]
				local coef_cogfunction_nap2 = string(r(estimate),"%3.2f")
				local se_cogfunction_nap2 = string(r(se),"%3.2f")
				local tstat_cogfunction_nap2 = r(estimate)/r(se)
				local p_cogfunction_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_int_cell]
				local coef_cogfunction_int = string(r(estimate),"%3.2f")
				local se_cogfunction_int = string(r(se),"%3.2f")
				local tstat_cogfunction_int = r(estimate)/r(se)
				local p_cogfunction_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
			* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_cogfunction_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_cogfunction_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_cogfunction_d_int_nap = string(r(p), "%3.2f")	

	tempfile cognitive
	save `cognitive', replace

*************
* ATTENTION *
*************

use "$d/salience_dataset.dta", clear

rename treat treat_pool
drop treat_pool treat_nap 
merge m:1 pid day_in_study using `rand', nogen
	
egen tag_pid = tag(pid) if post_treatment==1

* Residualized output
reghdfe output_section , absorb(day_in_study pid date) residuals
cap drop output_res
rename _reghdfe_resid output_res

* Generating average standardized output by high and salient sessions - post treatment
foreach var in  output_res  {

	bys pid: egen _avg_`var'_high_sal = mean(`var') if high==1 & salience==1 & post_treatment==1
	bys pid: egen avg_`var'_high_sal = mean(_avg_`var'_high_sal)

	bys pid: egen _avg_`var'_high_notsal = mean(`var') if high==1 & salience==0 & post_treatment==1
	bys pid: egen avg_`var'_high_notsal = mean(_avg_`var'_high_notsal)

	bys pid: egen _avg_`var'_low_sal = mean(`var') if high==0 & salience==1 & post_treatment==1
	bys pid: egen avg_`var'_low_sal = mean(_avg_`var'_low_sal)

	bys pid: egen _avg_`var'_low_notsal = mean(`var') if high==0 & salience==0 & post_treatment==1
	bys pid: egen avg_`var'_low_notsal = mean(_avg_`var'_low_notsal)

	gen attent_`var' = avg_`var'_high_sal - avg_`var'_high_notsal - (avg_`var'_low_sal - avg_`var'_low_notsal)

}

* Generating average standardized output by high and salient sessions - baseline

foreach var in output_res  {

	bys pid: egen _avg_`var'_high_sal_bas = mean(`var') if high==1 & salience==1 & post_treatment==0
	bys pid: egen avg_`var'_high_sal_bas = mean(_avg_`var'_high_sal_bas)

	bys pid: egen _avg_`var'_high_notsal_bas = mean(`var') if high==1 & salience==0 & post_treatment==0
	bys pid: egen avg_`var'_high_notsal_bas = mean(_avg_`var'_high_notsal_bas)

	bys pid: egen _avg_`var'_low_sal_bas = mean(`var') if high==0 & salience==1 & post_treatment==0
	bys pid: egen avg_`var'_low_sal_bas = mean(_avg_`var'_low_sal_bas)

	bys pid: egen _avg_`var'_low_notsal_bas = mean(`var') if high==0 & salience==0 & post_treatment==0
	bys pid: egen avg_`var'_low_notsal_bas = mean(_avg_`var'_low_notsal_bas)

	gen attent_`var'_bas = avg_`var'_high_sal_bas - avg_`var'_high_notsal_bas - (avg_`var'_low_sal_bas - avg_`var'_low_notsal_bas)

}

* standardize variables

foreach var in attent_output_res  {

	sum `var' if treat_pool==0 & treat_nap==0 & tag_pid == 1
	gen std_`var' = (`var' - `r(mean)')/`r(sd)'

	sum `var'_bas if treat_pool==0 & treat_nap==0 & tag_pid == 1
	gen std_`var'_base = (`var'_bas - `r(mean)')/`r(sd)'

}

	* Changing sign - IMPORTANT
	replace std_attent_output_res = -std_attent_output_res
	replace std_attent_output_res_base = -std_attent_output_res_base

	rename std_attent_output_res_base baseline

		* Collapsing

		collapse (mean) std_attent_output_res  baseline (max) treat_pool_cell treat_nap_cell treat_int_cell age female, by(pid post_treatment)
		
	*Pooled treatments
		reghdfe std_attent_output_res treat_pool_cell treat_nap_cell treat_int_cell baseline if post_treatment == 1, absorb(age female) vce(robust)

		local pids_attention = e(N)
		local obs_attention = e(N)

		lincom _b[treat_pool_cell]
			local coef_attention_ns = string(r(estimate),"%3.2f")
			local se_attention_ns = string(r(se),"%3.2f")
			local tstat_attention_ns = r(estimate)/r(se)
			local p_attention_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

		lincom _b[treat_nap_cell]
			local coef_attention_nap2 = string(r(estimate),"%3.2f")
			local se_attention_nap2 = string(r(se),"%3.2f")
			local tstat_attention_nap2 = r(estimate)/r(se)
			local p_attention_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

		lincom _b[treat_int_cell]
			local coef_attention_int = string(r(estimate),"%3.2f")
			local se_attention_int = string(r(se),"%3.2f")
			local tstat_attention_int = r(estimate)/r(se)
			local p_attention_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
			
		* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_attention_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_attention_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_attention_d_int_nap = string(r(p), "%3.2f")	
			
		rename baseline std_attent_output_base

*******************
* COGNITION INDEX *
*******************

	merge 1:1 pid post_treatment using `cognitive', gen(cog_merge)

	replace std_attent_output_res = std_attent_output_base if post_treatment==0

		* Weighted index

	*Pooled treatments
		* Step 1: define locals for the relevant arguments

			*Components
			local components "pv_perf hf_payment co_payment std_attent_output_res"

			*Covariates to be included linearly
			local controls "female"

			*Treatment dummies
			local treatments "treat_pool_cell treat_nap_cell treat_int_cell"

			*Covariates to be included inside "absorb" of reghdfe (if this is empty, add a factor var from controls)
			local factorcovariates "age"

			*Subset
			local subset "if post_treatment==1"

			*Eststo? Y = 1, N = 0
			local eststo 0

		* Step 2: REMEMBER: ALWAYS MAKE SURE SIGNS ARE RIGHT

		* Step 3: Just run this line!
			anderson_index "`components'" "`treatments'" "`controls'" "`factorcovariates'" "`subset'" `eststo'

			foreach var in pv_perf hf_payment co_payment std_attent_output_res {

			sum `var'_post_std if post_treatment==1
			sum `var'_pre_std if post_treatment==0

			}

			foreach var in female treat_pool treat_nap age {

			sum `var' if post_treatment==1, de

			}

			local pids_cogindex = e(N_clust)
			local obs_cogindex = e(N)

			lincom _b[treat_pool_cell]
				local coef_cogindex_ns = string(r(estimate),"%3.2f")
				local se_cogindex_ns = string(r(se),"%3.2f")
				local tstat_cogindex_ns = r(estimate)/r(se)
				local p_cogindex_ns = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_nap_cell]
				local coef_cogindex_nap2 = string(r(estimate),"%3.2f")
				local se_cogindex_nap2 = string(r(se),"%3.2f")
				local tstat_cogindex_nap2 = r(estimate)/r(se)
				local p_cogindex_nap2 = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))

			lincom _b[treat_int_cell]
				local coef_cogindex_int = string(r(estimate),"%3.2f")
				local se_cogindex_int = string(r(se),"%3.2f")
				local tstat_cogindex_int = r(estimate)/r(se)
				local p_cogindex_int = (2 * ttail(e(df_r), abs(r(estimate)/r(se))))
				
			* Get pval of differences
			lincom _b[treat_nap_cell] - _b[treat_pool_cell]
				local p_cogindex_d_nap_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_pool_cell]
				local p_cogindex_d_int_ns = string(r(p), "%3.2f")
			lincom _b[treat_int_cell] - _b[treat_nap_cell]
				local p_cogindex_d_int_nap = string(r(p), "%3.2f")	
		}


***************
* CAPTURING COEFFICIENTS
***************

	foreach var in psych physical wellbeing risk social timepref pref cogfunction cogindex attention prod typing earnings output {
		foreach treat in ns nap2 int {
			gen tstat_`var'_`treat' = `tstat_`var'_`treat''
		}
		foreach type in nap_ns int_ns int_nap {
			gen p_`var'_d_`type' = `p_`var'_d_`type''
		}
	}
	
	di "This part is ok so far"
				
				keep if _n == 1
				keep tstat* p_*
		
				if `x' == 1 {
					save "$d/mht_ri_pvals_table_int_pool.dta", replace
				}
				
				else {
					append using "$d/mht_ri_pvals_table_int_pool.dta"
					save "$d/mht_ri_pvals_table_int_pool.dta", replace
				}
		
		}
				
